package search.impl;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import search.Parser;
import search.WebSpider;

public class WebSpiderImpl implements WebSpider {
	
	public Parser getParser() {
		
		return new ParserImpl();
	}

	public List<String> getHtmlFromWeb() {
	  	Document document;
    	int cnt = 0;
    	String website = "https://gradadmissions.stanford.edu";
    	List<String> pages = new ArrayList<String>();
		try {
			document = Jsoup.connect("https://gradadmissions.stanford.edu/programs").timeout(999999999).get();
			Elements links= document.select("a[href^=/programs/]");
			for(Element link:links) {
				cnt++;
				if(cnt!=1) {							//除了第一个链接不是,其他都是
					pages.add(website+link.attr("href"));
				}
			}
			return pages;
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		return null;				
	} 	
}